Do the modules recapitulate sub-cell types clusters/states?

degs_sn = "/pastel/resources/sn_columbia_degs/Green_sn_may2024/SupplementaryTable2_AtlasCharacterization1.xlsx"
degs_sn = read_excel(degs_sn, sheet = "DEGs", col_names = TRUE)
degs_sn = as.data.frame(degs_sn)

# createDT(degs_sn)

Microglia

#############################################
### Microglia states vs Microglia modules
#############################################

microglia_states_df = degs_sn %>% 
  filter(cell.type == "microglia") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = microglia_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = unique(degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "mic/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                                       cutpoints = c(0, 0.05, 1), 
                                       symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#9467BD"))(5)

# grDevices::cairo_pdf(file = paste0(work_dir, "jaccard_mic.pdf"), width = 16, height = 6)
ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

# dev.off()
mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
  x_df = map_df(x, ~{
  paste(.x, collapse = ",")
  }, .id = "id") %>% t() %>% as.data.frame()
  x_df = x_df %>% rownames_to_column("subcell_pop")
  colnames(x_df) = c("subcell_pop", "genes")
  x_df
})

mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
  pivot_longer(cols = -subcell_pop, 
               names_to = "module", 
               values_to = "intersection") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
           pivot_longer(cols = -subcell_pop, 
                        names_to = "module", 
                        values_to = "bonf_pval") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_stats = mod_pval_bonf_df %>%
  left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
  left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
  arrange(bonf_pval)

createDT(mod_stats)

Astrocytes

#############################################
### Astrocytes states vs Astrocytes modules
#############################################
astrocytes_states_df = degs_sn %>% 
  filter(cell.type == "astrocytes") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = astrocytes_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "ast/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                            cutpoints = c(0, 0.05, 1), 
                            symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#1F77B4"))(5)

# grDevices::cairo_pdf(file = paste0(work_dir, "jaccard_ast.pdf"), width = 13, height = 4)
ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

# dev.off()
mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
  x_df = map_df(x, ~{
  paste(.x, collapse = ",")
  }, .id = "id") %>% t() %>% as.data.frame()
  x_df = x_df %>% rownames_to_column("subcell_pop")
  colnames(x_df) = c("subcell_pop", "genes")
  x_df
})

mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
  pivot_longer(cols = -subcell_pop, 
               names_to = "module", 
               values_to = "intersection") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
           pivot_longer(cols = -subcell_pop, 
                        names_to = "module", 
                        values_to = "bonf_pval") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_stats = mod_pval_bonf_df %>%
  left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
  left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
  arrange(bonf_pval)

createDT(mod_stats)

Excitatory neurons

#############################################
### excitatory states vs excitatory modules
#############################################

excitatory_states_df = degs_sn %>% 
  filter(cell.type == "excitatory") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = excitatory_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "ext/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                            cutpoints = c(0, 0.05, 1), 
                            symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#2ca02c"))(5)

ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
  x_df = map_df(x, ~{
  paste(.x, collapse = ",")
  }, .id = "id") %>% t() %>% as.data.frame()
  x_df = x_df %>% rownames_to_column("subcell_pop")
  colnames(x_df) = c("subcell_pop", "genes")
  x_df
})

mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
  pivot_longer(cols = -subcell_pop, 
               names_to = "module", 
               values_to = "intersection") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
           pivot_longer(cols = -subcell_pop, 
                        names_to = "module", 
                        values_to = "bonf_pval") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_stats = mod_pval_bonf_df %>%
  left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
  left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
  arrange(bonf_pval)

createDT(mod_stats)

Inhibitory neurons

#############################################
### inhibitory states vs inhibitory modules
#############################################

inhibitory_states_df = degs_sn %>% 
  filter(cell.type == "inhibitory") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = inhibitory_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "inh/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                            cutpoints = c(0, 0.05, 1), 
                            symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#d62728"))(5)

ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
  x_df = map_df(x, ~{
  paste(.x, collapse = ",")
  }, .id = "id") %>% t() %>% as.data.frame()
  x_df = x_df %>% rownames_to_column("subcell_pop")
  colnames(x_df) = c("subcell_pop", "genes")
  x_df
})

mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
  pivot_longer(cols = -subcell_pop, 
               names_to = "module", 
               values_to = "intersection") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
           pivot_longer(cols = -subcell_pop, 
                        names_to = "module", 
                        values_to = "bonf_pval") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_stats = mod_pval_bonf_df %>%
  left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
  left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
  arrange(bonf_pval)

createDT(mod_stats)

Oligodendrocytes

#############################################
### oligodendroglia states vs oligodendroglia modules
#############################################

oligodendroglia_states_df = degs_sn %>% 
  filter(cell.type == "oligodendroglia") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = oligodendroglia_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "oli/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                            cutpoints = c(0, 0.05, 1), 
                            symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#e377c2"))(5)

ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

mod_intersection_list = getNestedList(gom.obj, name="intersection")
mod_intersection_list2 = lapply(mod_intersection_list, function(x) {
  x_df = map_df(x, ~{
  paste(.x, collapse = ",")
  }, .id = "id") %>% t() %>% as.data.frame()
  x_df = x_df %>% rownames_to_column("subcell_pop")
  colnames(x_df) = c("subcell_pop", "genes")
  x_df
})

mod_intersection_df = map_df(mod_intersection_list2, bind_rows,.id = "module")
mod_intersection_df = mod_intersection_df[,c("subcell_pop", "module", "genes")] %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_intersection_num_df = as.data.frame(mod_intersection) %>% rownames_to_column("subcell_pop") %>%
  pivot_longer(cols = -subcell_pop, 
               names_to = "module", 
               values_to = "intersection") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_pval_bonf_df = pval_bonf_df %>% rownames_to_column("subcell_pop") %>%
           pivot_longer(cols = -subcell_pop, 
                        names_to = "module", 
                        values_to = "bonf_pval") %>%
  mutate(cell_mod = paste0(gsub("(.*) (.*)","\\1",subcell_pop), "_", gsub("(.*) (.*)","\\1",module)))

mod_stats = mod_pval_bonf_df %>%
  left_join(mod_intersection_num_df[,c("cell_mod","intersection")], by = c("cell_mod")) %>%
  left_join(mod_intersection_df[,c("cell_mod","genes")], by = c("cell_mod")) %>% distinct() %>%
  arrange(bonf_pval)

createDT(mod_stats)
#############################################
### vascular states vs end modules
#############################################

vascular_states_df = degs_sn %>% 
  filter(cell.type == "vascular niche") %>%
  group_by(cell.type,state) %>% 
  reframe(n_genes = n(), gene, avg_log2FC, p_val_adj) %>%
  mutate(p_val_adj = ifelse(p_val_adj == 0, 1e-310, p_val_adj),
         category = paste0(cell.type, "_", state)) %>%
  group_by(category) %>% 
  mutate(stats = (p_val_adj*avg_log2FC)) %>%
  filter(!is.na(state))

degs_sn_clean = vascular_states_df
genes_lists = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists[[cat_i]] = degs_sn_clean$gene[degs_sn_clean$category == cat_i]
}
genes_lists4GSEA = list()
for(cat_i in unique(degs_sn_clean$category)){
  genes_lists4GSEA[[cat_i]] = setNames(degs_sn_clean$stats[degs_sn_clean$category == cat_i], degs_sn_clean$gene[degs_sn_clean$category == cat_i])
}

### Modules from SE
modules_file = read.table(paste0(net_dir, "end/geneBycluster.txt"), header = T) 
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")

too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
clusters_list = as.character(unique(modules_file$cluster_lv3))
clusters_list = clusters_list[!clusters_list %in% too_small]

# Build list of outputs
output <- list()
for(i in 1:length(clusters_list)){
  m_cluster = clusters_list[i]
  module <- subset(modules_file, modules_file$cluster_lv3==m_cluster, select = c("gene_name"))
  
  # Header for iteration
  txt <- paste0("M", m_cluster, " (", length(module$gene_name), ")" )
  output[[txt]] <- module$gene_name
}  

# Overlap
gom.obj <- newGOM(genes_lists, output, genome.size = nrow(modules_file), spec=c('hg19.gene'))
pval_table = getMatrix(gom.obj, name="pval")
odds_table = getMatrix(gom.obj, "odds.ratio")
jaccard_table = getMatrix(gom.obj, "Jaccard")
mod_intersection = getMatrix(gom.obj, name="intersection")

rownames_plot = paste0(names(genes_lists)," (",sapply(genes_lists, length),")")
rownames(pval_table) = rownames_plot
rownames(odds_table) = rownames_plot
rownames(jaccard_table) = rownames_plot
rownames(mod_intersection) = rownames_plot

pval_bonf_df = as.data.frame(pval_table) %>% 
  mutate(across(dplyr::everything(), ~ p.adjust(.x, method = "bonferroni", n = length(.x)))) %>% 
  as.data.frame() 

pval_table.signif <- symnum(as.matrix(pval_bonf_df), corr = FALSE, na = FALSE, 
                            cutpoints = c(0, 0.05, 1), 
                            symbols = c(quote("\u2731")," "))

col_fun = colorRampPalette(c("white","#ff7f0e"))(5)

ComplexHeatmap::Heatmap(jaccard_table, 
                        col = col_fun, name = "Jaccard Index",
                        rect_gp = gpar(col = "white", lwd = 0.2), #lwd is the white line grid. 0.2 for 23 modules
                        column_title = "",
                        row_title = "Modules", row_names_side = c("left"),
                        row_order = order(sapply(genes_lists, length)), 
                        cluster_columns = T, show_column_dend = T,
                        cluster_rows = T, show_row_dend = T,
                        cell_fun = function(j, i, x, y, width, height, fill) {
                          grid.text( pval_table.signif[i,j], x, y, gp = gpar(fontsize = 12))
                          # grid.text(sprintf("%d\n(%s)", mod_intersection[i, j],formatC(pval_table[i, j], format = "e", digits = 1)), x, y, gp = gpar(fontsize = 9)) #fontsize = 10
                        }) 

Session Info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] gtools_3.9.5       RColorBrewer_1.1-3 GeneOverlap_1.30.0 lubridate_1.9.3   
##  [5] forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2       
##  [9] readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1     
## [13] tidyverse_2.0.0    readxl_1.4.3      
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9            jsonlite_1.8.8        foreach_1.5.2        
##  [4] bslib_0.8.0           highr_0.11            stats4_4.1.2         
##  [7] cellranger_1.1.0      yaml_2.3.10           pillar_1.9.0         
## [10] glue_1.7.0            digest_0.6.36         colorspace_2.1-1     
## [13] htmltools_0.5.8.1     pkgconfig_2.0.3       GetoptLong_1.0.5     
## [16] magick_2.8.4          scales_1.3.0          tzdb_0.4.0           
## [19] timechange_0.3.0      generics_0.1.3        IRanges_2.28.0       
## [22] DT_0.33               cachem_1.1.0          withr_3.0.0          
## [25] BiocGenerics_0.40.0   cli_3.6.3             magrittr_2.0.3       
## [28] crayon_1.5.3          evaluate_0.24.0       fansi_1.0.6          
## [31] doParallel_1.0.17     gplots_3.1.3.1        Cairo_1.6-2          
## [34] tools_4.1.2           hms_1.1.3             GlobalOptions_0.1.2  
## [37] lifecycle_1.0.4       matrixStats_1.3.0     ComplexHeatmap_2.15.4
## [40] S4Vectors_0.32.4      munsell_0.5.1         cluster_2.1.2        
## [43] compiler_4.1.2        jquerylib_0.1.4       caTools_1.18.2       
## [46] rlang_1.1.4           iterators_1.0.14      rstudioapi_0.16.0    
## [49] htmlwidgets_1.6.4     rjson_0.2.21          circlize_0.4.16      
## [52] crosstalk_1.2.1       bitops_1.0-8          rmarkdown_2.27       
## [55] gtable_0.3.5          codetools_0.2-18      R6_2.5.1             
## [58] knitr_1.48            fastmap_1.2.0         utf8_1.2.4           
## [61] clue_0.3-65           KernSmooth_2.23-20    shape_1.4.6.1        
## [64] stringi_1.8.4         parallel_4.1.2        Rcpp_1.0.13          
## [67] vctrs_0.6.5           png_0.1-8             tidyselect_1.2.1     
## [70] xfun_0.46